pystan>=3.0 is currently not supported
pip uninstall pystan --yes && pip install pystan==2.19.1.1 && pip install prophet
pip uninstall pystan --yes && pip install pystan==2.19.1.1 && pip install prophet
Found existing installation: pystan 2.19.1.1
Uninstalling pystan-2.19.1.1:
Successfully uninstalled pystan-2.19.1.1
Collecting pystan==2.19.1.1
Downloading pystan-2.19.1.1-cp37-cp37m-manylinux1_x86_64.whl (67.3 MB)
|████████████████████████████████| 67.3 MB 1.3 MB/s
Requirement already satisfied: numpy>=1.7 in /usr/local/lib/python3.7/dist-packages (from pystan==2.19.1.1) (1.19.5)
Requirement already satisfied: Cython!=0.25.1,>=0.22 in /usr/local/lib/python3.7/dist-packages (from pystan==2.19.1.1) (0.29.24)
Installing collected packages: pystan
Successfully installed pystan-2.19.1.1
Collecting prophet
Downloading prophet-1.0.1.tar.gz (65 kB)
|████████████████████████████████| 65 kB 2.1 MB/s
Requirement already satisfied: Cython>=0.22 in /usr/local/lib/python3.7/dist-packages (from prophet) (0.29.24)
Collecting cmdstanpy==0.9.68
Downloading cmdstanpy-0.9.68-py3-none-any.whl (49 kB)
|████████████████████████████████| 49 kB 4.7 MB/s
Requirement already satisfied: pystan~=2.19.1.1 in /usr/local/lib/python3.7/dist-packages (from prophet) (2.19.1.1)
Requirement already satisfied: numpy>=1.15.4 in /usr/local/lib/python3.7/dist-packages (from prophet) (1.19.5)
Requirement already satisfied: pandas>=1.0.4 in /usr/local/lib/python3.7/dist-packages (from prophet) (1.1.5)
Requirement already satisfied: matplotlib>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from prophet) (3.2.2)
Requirement already satisfied: LunarCalendar>=0.0.9 in /usr/local/lib/python3.7/dist-packages (from prophet) (0.0.9)
Requirement already satisfied: convertdate>=2.1.2 in /usr/local/lib/python3.7/dist-packages (from prophet) (2.3.2)
Requirement already satisfied: holidays>=0.10.2 in /usr/local/lib/python3.7/dist-packages (from prophet) (0.10.5.2)
Requirement already satisfied: setuptools-git>=1.2 in /usr/local/lib/python3.7/dist-packages (from prophet) (1.2)
Requirement already satisfied: python-dateutil>=2.8.0 in /usr/local/lib/python3.7/dist-packages (from prophet) (2.8.2)
Requirement already satisfied: tqdm>=4.36.1 in /usr/local/lib/python3.7/dist-packages (from prophet) (4.62.3)
Collecting ujson
Downloading ujson-5.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (43 kB)
|████████████████████████████████| 43 kB 1.8 MB/s
Requirement already satisfied: pytz>=2014.10 in /usr/local/lib/python3.7/dist-packages (from convertdate>=2.1.2->prophet) (2018.9)
Requirement already satisfied: pymeeus<=1,>=0.3.13 in /usr/local/lib/python3.7/dist-packages (from convertdate>=2.1.2->prophet) (0.5.11)
Requirement already satisfied: hijri-converter in /usr/local/lib/python3.7/dist-packages (from holidays>=0.10.2->prophet) (2.2.2)
Requirement already satisfied: korean-lunar-calendar in /usr/local/lib/python3.7/dist-packages (from holidays>=0.10.2->prophet) (0.2.1)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from holidays>=0.10.2->prophet) (1.15.0)
Requirement already satisfied: ephem>=3.7.5.3 in /usr/local/lib/python3.7/dist-packages (from LunarCalendar>=0.0.9->prophet) (4.1)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.0.0->prophet) (3.0.6)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.0.0->prophet) (1.3.2)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=2.0.0->prophet) (0.11.0)
Building wheels for collected packages: prophet
Building wheel for prophet (setup.py) ... done
Created wheel for prophet: filename=prophet-1.0.1-py3-none-any.whl size=6640460 sha256=755467d22a01b9b415c556cacd04592045dbee570dfd7e1aa5d90493a7fc4e37
Stored in directory: /root/.cache/pip/wheels/4e/a0/1a/02c9ec9e3e9de6bdbb3d769d11992a6926889d71567d6b9b67
Successfully built prophet
Installing collected packages: ujson, cmdstanpy, prophet
Attempting uninstall: cmdstanpy
Found existing installation: cmdstanpy 0.9.5
Uninstalling cmdstanpy-0.9.5:
Successfully uninstalled cmdstanpy-0.9.5
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fbprophet 0.7.1 requires cmdstanpy==0.9.5, but you have cmdstanpy 0.9.68 which is incompatible.
Successfully installed cmdstanpy-0.9.68 prophet-1.0.1 ujson-5.1.0
from prophet import Prophet
from prophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
!kaggle datasets download -d sudalairajkumar/cryptocurrencypricehistory
Traceback (most recent call last):
File "/usr/local/bin/kaggle", line 5, in <module>
from kaggle.cli import main
File "/usr/local/lib/python2.7/dist-packages/kaggle/__init__.py", line 23, in <module>
api.authenticate()
File "/usr/local/lib/python2.7/dist-packages/kaggle/api/kaggle_api_extended.py", line 146, in authenticate
self.config_file, self.config_dir))
IOError: Could not find kaggle.json. Make sure it's located in /root/.kaggle. Or use the environment method.
import os
if not os.path.exists('/cryptocurrencypricehistory'):
!mkdir -p cryptocurrencypricehistory
!unzip -u "cryptocurrencypricehistory.zip" -d "cryptocurrencypricehistory/"
Archive: cryptocurrencypricehistory.zip inflating: cryptocurrencypricehistory/coin_Aave.csv inflating: cryptocurrencypricehistory/coin_BinanceCoin.csv inflating: cryptocurrencypricehistory/coin_Bitcoin.csv inflating: cryptocurrencypricehistory/coin_Cardano.csv inflating: cryptocurrencypricehistory/coin_ChainLink.csv inflating: cryptocurrencypricehistory/coin_Cosmos.csv inflating: cryptocurrencypricehistory/coin_CryptocomCoin.csv inflating: cryptocurrencypricehistory/coin_Dogecoin.csv inflating: cryptocurrencypricehistory/coin_EOS.csv inflating: cryptocurrencypricehistory/coin_Ethereum.csv inflating: cryptocurrencypricehistory/coin_Iota.csv inflating: cryptocurrencypricehistory/coin_Litecoin.csv inflating: cryptocurrencypricehistory/coin_Monero.csv inflating: cryptocurrencypricehistory/coin_NEM.csv inflating: cryptocurrencypricehistory/coin_Polkadot.csv inflating: cryptocurrencypricehistory/coin_Solana.csv inflating: cryptocurrencypricehistory/coin_Stellar.csv inflating: cryptocurrencypricehistory/coin_Tether.csv inflating: cryptocurrencypricehistory/coin_Tron.csv inflating: cryptocurrencypricehistory/coin_USDCoin.csv inflating: cryptocurrencypricehistory/coin_Uniswap.csv inflating: cryptocurrencypricehistory/coin_WrappedBitcoin.csv inflating: cryptocurrencypricehistory/coin_XRP.csv
df_DOGE = pd.read_csv("cryptocurrencypricehistory/coin_Dogecoin.csv", parse_dates=['Date'])
df_DOGE = df_DOGE.loc[:, ['Date', 'Close']]
df_DOGE.head(10)
| Date | Close | |
|---|---|---|
| 0 | 2013-12-16 23:59:59 | 0.000205 |
| 1 | 2013-12-17 23:59:59 | 0.000269 |
| 2 | 2013-12-18 23:59:59 | 0.000362 |
| 3 | 2013-12-19 23:59:59 | 0.001162 |
| 4 | 2013-12-20 23:59:59 | 0.000704 |
| 5 | 2013-12-21 23:59:59 | 0.000394 |
| 6 | 2013-12-22 23:59:59 | 0.000315 |
| 7 | 2013-12-23 23:59:59 | 0.000451 |
| 8 | 2013-12-24 23:59:59 | 0.000686 |
| 9 | 2013-12-25 23:59:59 | 0.000587 |
df_DOGE.tail(10)
| Date | Close | |
|---|---|---|
| 2750 | 2021-06-27 23:59:59 | 0.264450 |
| 2751 | 2021-06-28 23:59:59 | 0.256857 |
| 2752 | 2021-06-29 23:59:59 | 0.262769 |
| 2753 | 2021-06-30 23:59:59 | 0.254215 |
| 2754 | 2021-07-01 23:59:59 | 0.244549 |
| 2755 | 2021-07-02 23:59:59 | 0.245264 |
| 2756 | 2021-07-03 23:59:59 | 0.246411 |
| 2757 | 2021-07-04 23:59:59 | 0.246483 |
| 2758 | 2021-07-05 23:59:59 | 0.231614 |
| 2759 | 2021-07-06 23:59:59 | 0.234422 |
df_DOGE.info(verbose=True, show_counts=True)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-23-60bf7ff5bfa0> in <module>() ----> 1 df_DOGE.info(verbose=True, show_counts=True) TypeError: info() got an unexpected keyword argument 'show_counts'
Prophet also imposes the strict condition that the input columns must be named as:
So, we must rename the columns in our dataframe.
df_DOGE = df_DOGE.rename(columns={'Date': 'ds',
'Close': 'y'})
df_DOGE.head()
| ds | y | |
|---|---|---|
| 0 | 2013-12-16 23:59:59 | 0.000205 |
| 1 | 2013-12-17 23:59:59 | 0.000269 |
| 2 | 2013-12-18 23:59:59 | 0.000362 |
| 3 | 2013-12-19 23:59:59 | 0.001162 |
| 4 | 2013-12-20 23:59:59 | 0.000704 |
df_DOGE.tail()
| ds | y | |
|---|---|---|
| 2755 | 2021-07-02 23:59:59 | 0.245264 |
| 2756 | 2021-07-03 23:59:59 | 0.246411 |
| 2757 | 2021-07-04 23:59:59 | 0.246483 |
| 2758 | 2021-07-05 23:59:59 | 0.231614 |
| 2759 | 2021-07-06 23:59:59 | 0.234422 |
ax = df_DOGE.set_index('ds').plot(figsize=(12, 8))
ax.set_ylabel('Close Price')
ax.set_xlabel('Date')
plt.show()
The figure shows that the data have seasonality and trend
prediction_size = 60
train_df = df_DOGE[:-prediction_size]
train_df.tail()
| ds | y | |
|---|---|---|
| 2695 | 2021-05-03 23:59:59 | 0.441707 |
| 2696 | 2021-05-04 23:59:59 | 0.541334 |
| 2697 | 2021-05-05 23:59:59 | 0.657633 |
| 2698 | 2021-05-06 23:59:59 | 0.580804 |
| 2699 | 2021-05-07 23:59:59 | 0.684777 |
Now, we will describe how to use the Prophet library to predict future values of our time series data.
The developers of Prophet have made it more intuitive for analysts and developers alike to work with time series data.
To begin, we must instantiate a new Prophet object. Prophet enables us to specify a number of arguments. For example, we can specify the desired range of our uncertainty interval by setting the interval_width parameter.
help(Prophet)
Help on class Prophet in module prophet.forecaster: class Prophet(builtins.object) | Prophet(growth='linear', changepoints=None, n_changepoints=25, changepoint_range=0.8, yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', holidays=None, seasonality_mode='additive', seasonality_prior_scale=10.0, holidays_prior_scale=10.0, changepoint_prior_scale=0.05, mcmc_samples=0, interval_width=0.8, uncertainty_samples=1000, stan_backend=None) | | Prophet forecaster. | | Parameters | ---------- | growth: String 'linear' or 'logistic' to specify a linear or logistic | trend. | changepoints: List of dates at which to include potential changepoints. If | not specified, potential changepoints are selected automatically. | n_changepoints: Number of potential changepoints to include. Not used | if input `changepoints` is supplied. If `changepoints` is not supplied, | then n_changepoints potential changepoints are selected uniformly from | the first `changepoint_range` proportion of the history. | changepoint_range: Proportion of history in which trend changepoints will | be estimated. Defaults to 0.8 for the first 80%. Not used if | `changepoints` is specified. | yearly_seasonality: Fit yearly seasonality. | Can be 'auto', True, False, or a number of Fourier terms to generate. | weekly_seasonality: Fit weekly seasonality. | Can be 'auto', True, False, or a number of Fourier terms to generate. | daily_seasonality: Fit daily seasonality. | Can be 'auto', True, False, or a number of Fourier terms to generate. | holidays: pd.DataFrame with columns holiday (string) and ds (date type) | and optionally columns lower_window and upper_window which specify a | range of days around the date to be included as holidays. | lower_window=-2 will include 2 days prior to the date as holidays. Also | optionally can have a column prior_scale specifying the prior scale for | that holiday. | seasonality_mode: 'additive' (default) or 'multiplicative'. | seasonality_prior_scale: Parameter modulating the strength of the | seasonality model. Larger values allow the model to fit larger seasonal | fluctuations, smaller values dampen the seasonality. Can be specified | for individual seasonalities using add_seasonality. | holidays_prior_scale: Parameter modulating the strength of the holiday | components model, unless overridden in the holidays input. | changepoint_prior_scale: Parameter modulating the flexibility of the | automatic changepoint selection. Large values will allow many | changepoints, small values will allow few changepoints. | mcmc_samples: Integer, if greater than 0, will do full Bayesian inference | with the specified number of MCMC samples. If 0, will do MAP | estimation. | interval_width: Float, width of the uncertainty intervals provided | for the forecast. If mcmc_samples=0, this will be only the uncertainty | in the trend using the MAP estimate of the extrapolated generative | model. If mcmc.samples>0, this will be integrated over all model | parameters, which will include uncertainty in seasonality. | uncertainty_samples: Number of simulated draws used to estimate | uncertainty intervals. Settings this value to 0 or False will disable | uncertainty estimation and speed up the calculation. | stan_backend: str as defined in StanBackendEnum default: None - will try to | iterate over all available backends and find the working one | | Methods defined here: | | __init__(self, growth='linear', changepoints=None, n_changepoints=25, changepoint_range=0.8, yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', holidays=None, seasonality_mode='additive', seasonality_prior_scale=10.0, holidays_prior_scale=10.0, changepoint_prior_scale=0.05, mcmc_samples=0, interval_width=0.8, uncertainty_samples=1000, stan_backend=None) | Initialize self. See help(type(self)) for accurate signature. | | add_country_holidays(self, country_name) | Add in built-in holidays for the specified country. | | These holidays will be included in addition to any specified on model | initialization. | | Holidays will be calculated for arbitrary date ranges in the history | and future. See the online documentation for the list of countries with | built-in holidays. | | Built-in country holidays can only be set for a single country. | | Parameters | ---------- | country_name: Name of the country, like 'UnitedStates' or 'US' | | Returns | ------- | The prophet object. | | add_group_component(self, components, name, group) | Adds a component with given name that contains all of the components | in group. | | Parameters | ---------- | components: Dataframe with components. | name: Name of new group component. | group: List of components that form the group. | | Returns | ------- | Dataframe with components. | | add_regressor(self, name, prior_scale=None, standardize='auto', mode=None) | Add an additional regressor to be used for fitting and predicting. | | The dataframe passed to `fit` and `predict` will have a column with the | specified name to be used as a regressor. When standardize='auto', the | regressor will be standardized unless it is binary. The regression | coefficient is given a prior with the specified scale parameter. | Decreasing the prior scale will add additional regularization. If no | prior scale is provided, self.holidays_prior_scale will be used. | Mode can be specified as either 'additive' or 'multiplicative'. If not | specified, self.seasonality_mode will be used. 'additive' means the | effect of the regressor will be added to the trend, 'multiplicative' | means it will multiply the trend. | | Parameters | ---------- | name: string name of the regressor. | prior_scale: optional float scale for the normal prior. If not | provided, self.holidays_prior_scale will be used. | standardize: optional, specify whether this regressor will be | standardized prior to fitting. Can be 'auto' (standardize if not | binary), True, or False. | mode: optional, 'additive' or 'multiplicative'. Defaults to | self.seasonality_mode. | | Returns | ------- | The prophet object. | | add_seasonality(self, name, period, fourier_order, prior_scale=None, mode=None, condition_name=None) | Add a seasonal component with specified period, number of Fourier | components, and prior scale. | | Increasing the number of Fourier components allows the seasonality to | change more quickly (at risk of overfitting). Default values for yearly | and weekly seasonalities are 10 and 3 respectively. | | Increasing prior scale will allow this seasonality component more | flexibility, decreasing will dampen it. If not provided, will use the | seasonality_prior_scale provided on Prophet initialization (defaults | to 10). | | Mode can be specified as either 'additive' or 'multiplicative'. If not | specified, self.seasonality_mode will be used (defaults to additive). | Additive means the seasonality will be added to the trend, | multiplicative means it will multiply the trend. | | If condition_name is provided, the dataframe passed to `fit` and | `predict` should have a column with the specified condition_name | containing booleans which decides when to apply seasonality. | | Parameters | ---------- | name: string name of the seasonality component. | period: float number of days in one period. | fourier_order: int number of Fourier components to use. | prior_scale: optional float prior scale for this component. | mode: optional 'additive' or 'multiplicative' | condition_name: string name of the seasonality condition. | | Returns | ------- | The prophet object. | | construct_holiday_dataframe(self, dates) | Construct a dataframe of holiday dates. | | Will combine self.holidays with the built-in country holidays | corresponding to input dates, if self.country_holidays is set. | | Parameters | ---------- | dates: pd.Series containing timestamps used for computing seasonality. | | Returns | ------- | dataframe of holiday dates, in holiday dataframe format used in | initialization. | | fit(self, df, **kwargs) | Fit the Prophet model. | | This sets self.params to contain the fitted model parameters. It is a | dictionary parameter names as keys and the following items: | k (Mx1 array): M posterior samples of the initial slope. | m (Mx1 array): The initial intercept. | delta (MxN array): The slope change at each of N changepoints. | beta (MxK matrix): Coefficients for K seasonality features. | sigma_obs (Mx1 array): Noise level. | Note that M=1 if MAP estimation. | | Parameters | ---------- | df: pd.DataFrame containing the history. Must have columns ds (date | type) and y, the time series. If self.growth is 'logistic', then | df must also have a column cap that specifies the capacity at | each ds. | kwargs: Additional arguments passed to the optimizing or sampling | functions in Stan. | | Returns | ------- | The fitted Prophet object. | | initialize_scales(self, initialize_scales, df) | Initialize model scales. | | Sets model scaling factors using df. | | Parameters | ---------- | initialize_scales: Boolean set the scales or not. | df: pd.DataFrame for setting scales. | | make_all_seasonality_features(self, df) | Dataframe with seasonality features. | | Includes seasonality features, holiday features, and added regressors. | | Parameters | ---------- | df: pd.DataFrame with dates for computing seasonality features and any | added regressors. | | Returns | ------- | pd.DataFrame with regression features. | list of prior scales for each column of the features dataframe. | Dataframe with indicators for which regression components correspond to | which columns. | Dictionary with keys 'additive' and 'multiplicative' listing the | component names for each mode of seasonality. | | make_future_dataframe(self, periods, freq='D', include_history=True) | Simulate the trend using the extrapolated generative model. | | Parameters | ---------- | periods: Int number of periods to forecast forward. | freq: Any valid frequency for pd.date_range, such as 'D' or 'M'. | include_history: Boolean to include the historical dates in the data | frame for predictions. | | Returns | ------- | pd.Dataframe that extends forward from the end of self.history for the | requested number of periods. | | make_holiday_features(self, dates, holidays) | Construct a dataframe of holiday features. | | Parameters | ---------- | dates: pd.Series containing timestamps used for computing seasonality. | holidays: pd.Dataframe containing holidays, as returned by | construct_holiday_dataframe. | | Returns | ------- | holiday_features: pd.DataFrame with a column for each holiday. | prior_scale_list: List of prior scales for each holiday column. | holiday_names: List of names of holidays | | parse_seasonality_args(self, name, arg, auto_disable, default_order) | Get number of fourier components for built-in seasonalities. | | Parameters | ---------- | name: string name of the seasonality component. | arg: 'auto', True, False, or number of fourier components as provided. | auto_disable: bool if seasonality should be disabled when 'auto'. | default_order: int default fourier order | | Returns | ------- | Number of fourier components, or 0 for disabled. | | percentile(self, a, *args, **kwargs) | We rely on np.nanpercentile in the rare instances where there | are a small number of bad samples with MCMC that contain NaNs. | However, since np.nanpercentile is far slower than np.percentile, | we only fall back to it if the array contains NaNs. See | https://github.com/facebook/prophet/issues/1310 for more details. | | plot(self, fcst, ax=None, uncertainty=True, plot_cap=True, xlabel='ds', ylabel='y', figsize=(10, 6)) | Plot the Prophet forecast. | | Parameters | ---------- | fcst: pd.DataFrame output of self.predict. | ax: Optional matplotlib axes on which to plot. | uncertainty: Optional boolean to plot uncertainty intervals. | plot_cap: Optional boolean indicating if the capacity should be shown | in the figure, if available. | xlabel: Optional label name on X-axis | ylabel: Optional label name on Y-axis | figsize: Optional tuple width, height in inches. | | Returns | ------- | A matplotlib figure. | | plot_components(self, fcst, uncertainty=True, plot_cap=True, weekly_start=0, yearly_start=0, figsize=None) | Plot the Prophet forecast components. | | Will plot whichever are available of: trend, holidays, weekly | seasonality, and yearly seasonality. | | Parameters | ---------- | fcst: pd.DataFrame output of self.predict. | uncertainty: Optional boolean to plot uncertainty intervals. | plot_cap: Optional boolean indicating if the capacity should be shown | in the figure, if available. | weekly_start: Optional int specifying the start day of the weekly | seasonality plot. 0 (default) starts the week on Sunday. 1 shifts | by 1 day to Monday, and so on. | yearly_start: Optional int specifying the start day of the yearly | seasonality plot. 0 (default) starts the year on Jan 1. 1 shifts | by 1 day to Jan 2, and so on. | figsize: Optional tuple width, height in inches. | | Returns | ------- | A matplotlib figure. | | predict(self, df=None) | Predict using the prophet model. | | Parameters | ---------- | df: pd.DataFrame with dates for predictions (column ds), and capacity | (column cap) if logistic growth. If not provided, predictions are | made on the history. | | Returns | ------- | A pd.DataFrame with the forecast components. | | predict_seasonal_components(self, df) | Predict seasonality components, holidays, and added regressors. | | Parameters | ---------- | df: Prediction dataframe. | | Returns | ------- | Dataframe with seasonal components. | | predict_trend(self, df) | Predict trend using the prophet model. | | Parameters | ---------- | df: Prediction dataframe. | | Returns | ------- | Vector with trend on prediction dates. | | predict_uncertainty(self, df) | Prediction intervals for yhat and trend. | | Parameters | ---------- | df: Prediction dataframe. | | Returns | ------- | Dataframe with uncertainty intervals. | | predictive_samples(self, df) | Sample from the posterior predictive distribution. Returns samples | for the main estimate yhat, and for the trend component. The shape of | each output will be (nforecast x nsamples), where nforecast is the | number of points being forecasted (the number of rows in the input | dataframe) and nsamples is the number of posterior samples drawn. | This is the argument `uncertainty_samples` in the Prophet constructor, | which defaults to 1000. | | Parameters | ---------- | df: Dataframe with dates for predictions (column ds), and capacity | (column cap) if logistic growth. | | Returns | ------- | Dictionary with keys "trend" and "yhat" containing | posterior predictive samples for that component. | | regressor_column_matrix(self, seasonal_features, modes) | Dataframe indicating which columns of the feature matrix correspond | to which seasonality/regressor components. | | Includes combination components, like 'additive_terms'. These | combination components will be added to the 'modes' input. | | Parameters | ---------- | seasonal_features: Constructed seasonal features dataframe | modes: Dictionary with keys 'additive' and 'multiplicative' listing the | component names for each mode of seasonality. | | Returns | ------- | component_cols: A binary indicator dataframe with columns seasonal | components and rows columns in seasonal_features. Entry is 1 if | that columns is used in that component. | modes: Updated input with combination components. | | sample_model(self, df, seasonal_features, iteration, s_a, s_m) | Simulate observations from the extrapolated generative model. | | Parameters | ---------- | df: Prediction dataframe. | seasonal_features: pd.DataFrame of seasonal features. | iteration: Int sampling iteration to use parameters from. | s_a: Indicator vector for additive components | s_m: Indicator vector for multiplicative components | | Returns | ------- | Dataframe with trend and yhat, each like df['t']. | | sample_posterior_predictive(self, df) | Prophet posterior predictive samples. | | Parameters | ---------- | df: Prediction dataframe. | | Returns | ------- | Dictionary with posterior predictive samples for the forecast yhat and | for the trend component. | | sample_predictive_trend(self, df, iteration) | Simulate the trend using the extrapolated generative model. | | Parameters | ---------- | df: Prediction dataframe. | iteration: Int sampling iteration to use parameters from. | | Returns | ------- | np.array of simulated trend over df['t']. | | set_auto_seasonalities(self) | Set seasonalities that were left on auto. | | Turns on yearly seasonality if there is >=2 years of history. | Turns on weekly seasonality if there is >=2 weeks of history, and the | spacing between dates in the history is <7 days. | Turns on daily seasonality if there is >=2 days of history, and the | spacing between dates in the history is <1 day. | | set_changepoints(self) | Set changepoints | | Sets m$changepoints to the dates of changepoints. Either: | 1) The changepoints were passed in explicitly. | A) They are empty. | B) They are not empty, and need validation. | 2) We are generating a grid of them. | 3) The user prefers no changepoints be used. | | setup_dataframe(self, df, initialize_scales=False) | Prepare dataframe for fitting or predicting. | | Adds a time index and scales y. Creates auxiliary columns 't', 't_ix', | 'y_scaled', and 'cap_scaled'. These columns are used during both | fitting and predicting. | | Parameters | ---------- | df: pd.DataFrame with columns ds, y, and cap if logistic growth. Any | specified additional regressors must also be present. | initialize_scales: Boolean set scaling factors in self from df. | | Returns | ------- | pd.DataFrame prepared for fitting or predicting. | | validate_column_name(self, name, check_holidays=True, check_seasonalities=True, check_regressors=True) | Validates the name of a seasonality, holiday, or regressor. | | Parameters | ---------- | name: string | check_holidays: bool check if name already used for holiday | check_seasonalities: bool check if name already used for seasonality | check_regressors: bool check if name already used for regressor | | validate_inputs(self) | Validates the inputs to Prophet. | | ---------------------------------------------------------------------- | Class methods defined here: | | make_seasonality_features(dates, period, series_order, prefix) from builtins.type | Data frame with seasonality features. | | Parameters | ---------- | cls: Prophet class. | dates: pd.Series containing timestamps. | period: Number of days of the period. | series_order: Number of components. | prefix: Column name prefix. | | Returns | ------- | pd.DataFrame with seasonality features. | | ---------------------------------------------------------------------- | Static methods defined here: | | flat_growth_init(df) | Initialize flat growth. | | Provides a strong initialization for flat growth. Sets the growth to 0 | and offset parameter as mean of history y_scaled values. | | Parameters | ---------- | df: pd.DataFrame with columns ds (date), y_scaled (scaled time series), | and t (scaled time). | | Returns | ------- | A tuple (k, m) with the rate (k) and offset (m) of the linear growth | function. | | flat_trend(t, m) | Evaluate the flat trend function. | | Parameters | ---------- | t: np.array of times on which the function is evaluated. | m: Float initial offset. | | Returns | ------- | Vector y(t). | | fourier_series(dates, period, series_order) | Provides Fourier series components with the specified frequency | and order. | | Parameters | ---------- | dates: pd.Series containing timestamps. | period: Number of days of the period. | series_order: Number of components. | | Returns | ------- | Matrix with seasonality features. | | linear_growth_init(df) | Initialize linear growth. | | Provides a strong initialization for linear growth by calculating the | growth and offset parameters that pass the function through the first | and last points in the time series. | | Parameters | ---------- | df: pd.DataFrame with columns ds (date), y_scaled (scaled time series), | and t (scaled time). | | Returns | ------- | A tuple (k, m) with the rate (k) and offset (m) of the linear growth | function. | | logistic_growth_init(df) | Initialize logistic growth. | | Provides a strong initialization for logistic growth by calculating the | growth and offset parameters that pass the function through the first | and last points in the time series. | | Parameters | ---------- | df: pd.DataFrame with columns ds (date), cap_scaled (scaled capacity), | y_scaled (scaled time series), and t (scaled time). | | Returns | ------- | A tuple (k, m) with the rate (k) and offset (m) of the logistic growth | function. | | piecewise_linear(t, deltas, k, m, changepoint_ts) | Evaluate the piecewise linear function. | | Parameters | ---------- | t: np.array of times on which the function is evaluated. | deltas: np.array of rate changes at each changepoint. | k: Float initial rate. | m: Float initial offset. | changepoint_ts: np.array of changepoint times. | | Returns | ------- | Vector y(t). | | piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) | Evaluate the piecewise logistic function. | | Parameters | ---------- | t: np.array of times on which the function is evaluated. | cap: np.array of capacities at each t. | deltas: np.array of rate changes at each changepoint. | k: Float initial rate. | m: Float initial offset. | changepoint_ts: np.array of changepoint times. | | Returns | ------- | Vector y(t). | | ---------------------------------------------------------------------- | Data descriptors defined here: | | __dict__ | dictionary for instance variables (if defined) | | __weakref__ | list of weak references to the object (if defined)
# set the uncertainty interval to 95% (the Prophet default is 80%)
model = Prophet(interval_width=0.95)
help(Prophet.fit)
Help on function fit in module prophet.forecaster:
fit(self, df, **kwargs)
Fit the Prophet model.
This sets self.params to contain the fitted model parameters. It is a
dictionary parameter names as keys and the following items:
k (Mx1 array): M posterior samples of the initial slope.
m (Mx1 array): The initial intercept.
delta (MxN array): The slope change at each of N changepoints.
beta (MxK matrix): Coefficients for K seasonality features.
sigma_obs (Mx1 array): Noise level.
Note that M=1 if MAP estimation.
Parameters
----------
df: pd.DataFrame containing the history. Must have columns ds (date
type) and y, the time series. If self.growth is 'logistic', then
df must also have a column cap that specifies the capacity at
each ds.
kwargs: Additional arguments passed to the optimizing or sampling
functions in Stan.
Returns
-------
The fitted Prophet object.
model.fit(train_df)
INFO:numexpr.utils:NumExpr defaulting to 2 threads. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
<prophet.forecaster.Prophet at 0x7f46a4e1c190>
In order to obtain forecasts of our time series, we must provide Prophet with a new DataFrame containing a ds column that holds the dates for which we want predictions.
Conveniently, we do not have to concern ourselves with manually creating this DataFrame, as Prophet provides the make_future_dataframe helper function.
future_dates = model.make_future_dataframe(periods=prediction_size, freq='D')
future_dates.head()
| ds | |
|---|---|
| 0 | 2013-12-16 23:59:59 |
| 1 | 2013-12-17 23:59:59 |
| 2 | 2013-12-18 23:59:59 |
| 3 | 2013-12-19 23:59:59 |
| 4 | 2013-12-20 23:59:59 |
forecast = model.predict(future_dates)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()
| ds | yhat | yhat_lower | yhat_upper | |
|---|---|---|---|---|
| 0 | 2013-12-16 23:59:59 | -0.006189 | -0.064728 | 0.054731 |
| 1 | 2013-12-17 23:59:59 | -0.006196 | -0.064768 | 0.054439 |
| 2 | 2013-12-18 23:59:59 | -0.005709 | -0.063022 | 0.056439 |
| 3 | 2013-12-19 23:59:59 | -0.005752 | -0.067642 | 0.058568 |
| 4 | 2013-12-20 23:59:59 | -0.004876 | -0.065562 | 0.051039 |
Prophet returns a large DataFrame with many interesting columns, but we subset our output to the columns most relevant to forecasting. These are:
model.plot(forecast, uncertainty=True)
plt.show()
model.plot_components(forecast)
plt.show()
from prophet.plot import add_changepoints_to_plot
fig = model.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), model, forecast)
We can view the dates where the chagepoints occurred.
model.changepoints
86 2014-03-12 23:59:59 173 2014-06-07 23:59:59 259 2014-09-01 23:59:59 345 2014-11-26 23:59:59 432 2015-02-21 23:59:59 518 2015-05-18 23:59:59 605 2015-08-13 23:59:59 691 2015-11-07 23:59:59 777 2016-02-01 23:59:59 864 2016-04-28 23:59:59 950 2016-07-23 23:59:59 1036 2016-10-17 23:59:59 1123 2017-01-12 23:59:59 1209 2017-04-08 23:59:59 1295 2017-07-03 23:59:59 1382 2017-09-28 23:59:59 1468 2017-12-23 23:59:59 1554 2018-03-19 23:59:59 1641 2018-06-14 23:59:59 1727 2018-09-08 23:59:59 1814 2018-12-04 23:59:59 1900 2019-02-28 23:59:59 1986 2019-05-25 23:59:59 2073 2019-08-20 23:59:59 2159 2019-11-14 23:59:59 Name: ds, dtype: datetime64[ns]
pro_change= Prophet(changepoint_range=0.9)
forecast = pro_change.fit(df_DOGE).predict(future_dates)
fig= pro_change.plot(forecast);
a = add_changepoints_to_plot(fig.gca(), pro_change, forecast)
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
The number of changepoints can be set by using the n_changepoints parameter when initializing prophet.
pro_change= Prophet(n_changepoints=20, yearly_seasonality=True)
forecast = pro_change.fit(df_DOGE).predict(future_dates)
fig= pro_change.plot(forecast);
a = add_changepoints_to_plot(fig.gca(), pro_change, forecast)
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
pro_change= Prophet(n_changepoints=20, yearly_seasonality=True, changepoint_prior_scale=0.08)
forecast = pro_change.fit(df_DOGE).predict(future_dates)
fig= pro_change.plot(forecast);
a = add_changepoints_to_plot(fig.gca(), pro_change, forecast)
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
pro_change= Prophet(n_changepoints=20, yearly_seasonality=True, changepoint_prior_scale=0.001)
forecast = pro_change.fit(df_DOGE).predict(future_dates)
fig= pro_change.plot(forecast);
a = add_changepoints_to_plot(fig.gca(), pro_change, forecast)
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
def make_comparison_dataframe(historical, forecast):
"""Join the history with the forecast.
The resulting dataset will contain columns 'yhat', 'yhat_lower', 'yhat_upper' and 'y'.
"""
return forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(historical.set_index('ds'))
df_DOGE.head()
| ds | y | |
|---|---|---|
| 0 | 2013-12-16 23:59:59 | 0.000205 |
| 1 | 2013-12-17 23:59:59 | 0.000269 |
| 2 | 2013-12-18 23:59:59 | 0.000362 |
| 3 | 2013-12-19 23:59:59 | 0.001162 |
| 4 | 2013-12-20 23:59:59 | 0.000704 |
cmp_df = make_comparison_dataframe(df_DOGE, forecast)
cmp_df.tail(n=3)
| yhat | yhat_lower | yhat_upper | y | |
|---|---|---|---|---|
| ds | ||||
| 2021-07-04 23:59:59 | 0.056681 | -0.015783 | 0.128706 | 0.246483 |
| 2021-07-05 23:59:59 | 0.055072 | -0.026899 | 0.121952 | 0.231614 |
| 2021-07-06 23:59:59 | 0.053761 | -0.010176 | 0.122250 | 0.234422 |
We are also going to define a helper function that we will use to gauge the quality of our forecasting with MAPE, MSE, RMSE, and MAE error measures:
def calculate_forecast_errors(df, prediction_size):
"""Calculate MAPE and MAE of the forecast.
Args:
df: joined dataset with 'y' and 'yhat' columns.
prediction_size: number of days at the end to predict.
"""
# Make a copy
df = df.copy()
# Now we calculate the values of e_i and p_i according to the formulas given in the article above.
df['e'] = df['y'] - df['yhat']
df['p'] = 100 * df['e'] / df['y']
# Recall that we held out the values of the last `prediction_size` days
# in order to predict them and measure the quality of the model.
# Now cut out the part of the data which we made our prediction for.
predicted_part = df[-prediction_size:]
# Define the function that averages absolute error values over the predicted part.
error_mean = lambda error_name: np.mean(np.abs(predicted_part[error_name]))
# MSE
mse = np.mean(((df['yhat'] - df['y']) ** 2))
# RMSE
rmse = np.sqrt(np.mean(((df['yhat'] - df['y']) ** 2)))
# Now we can calculate MAPE and MAE and return the resulting dictionary of errors.
return {'MAPE': error_mean('p'), 'MSE': mse, 'RMSE': rmse, 'MAE': error_mean('e')}
for err_name, err_value in calculate_forecast_errors(cmp_df, prediction_size).items():
print(err_name, err_value)
MAPE 76.07724986558699 MSE 0.0031508023450342398 RMSE 0.05613200820418097 MAE 0.26347108422289384
from plotly.offline import init_notebook_mode, iplot
from plotly import graph_objs as go
def show_forecast(cmp_df, num_predictions, num_values, title):
"""Visualize the forecast."""
def create_go(name, column, num, **kwargs):
points = cmp_df.tail(num)
args = dict(name=name, x=points.index, y=points[column], mode='lines')
args.update(kwargs)
return go.Scatter(**args)
lower_bound = create_go('Lower Bound', 'yhat_lower', num_predictions,
line=dict(width=0),
marker=dict(color="gray"))
upper_bound = create_go('Upper Bound', 'yhat_upper', num_predictions,
line=dict(width=0),
marker=dict(color="gray"),
fillcolor='rgba(68, 68, 68, 0.3)',
fill='tonexty')
forecast = create_go('Forecast', 'yhat', num_predictions,
line=dict(color='rgb(31, 119, 180)'))
actual = create_go('Actual', 'y', num_values,
marker=dict(color="red"))
# In this case the order of the series is important because of the filling
data = [lower_bound, upper_bound, forecast, actual]
layout = go.Layout(yaxis=dict(title='Posts'), title=title, showlegend = False)
fig = go.Figure(data=data, layout=layout)
iplot(fig, show_link=False)
show_forecast(cmp_df, prediction_size, 100, None)